import requests
from lxml import etree


def get_html(main_url):
    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
    }
    # 拿链接
    res = requests.get(url, headers=header)
    res.encoding = res.apparent_encoding
    html = res.text
    return html


def get_url_lists(html):
    tree = etree.HTML(html)
    url_lists = tree.xpath('//div[@class="hd"]/a/@href')
    return url_lists


def movie_concat(html):
    tree = etree.HTML(html)
    content = tree.xpath('//div[@id="hot-comments"]//p[@class=" comment-content"]/span/text()')
    title = tree.xpath('//div[@class="mod-hd"]/h2/i/text()')
    movie_concat =  '\n' +title[0] + '\n' + '\n'.join(content)
    return movie_concat


def save_concat(movie_concat):
    with open('短评.txt', 'a+', encoding='utf-8') as f:
        f.write(movie_concat)


if __name__ == '__main__':
    url = 'https://movie.douban.com/top250?start=0'
    for url in get_url_lists(get_html(url)):
        save_concat(movie_concat(get_html(url)))
